The Journal of Open Source Software (JOSS)

Bringing open source software practices to the scholarly publishing community for authors, reviewers, editors, and publishers

Published

October 6, 2024

Introduction

This report contains the code for generating figures and numbers presented in

Diehl et al (2024): The Journal of Open Source Software (JOSS) - Bringing open source software practices to the scholarly publishing community for authors, reviewers, editors, and publishers

Load packages

We first load the R packages that are needed for the analysis and plotting.

Code
suppressPackageStartupMessages({
    library(ggplot2)
    library(dplyr)
    library(tidyr)
    library(cowplot)
    library(ggridges)
    library(lubridate)
    library(kableExtra)
    library(patchwork)
    library(purrr)
    library(gh)
    library(openalexR)
    library(countrycode)
})

Read data

Next, we download the data from https://www.theoj.org/joss-analytics/joss-submission-analytics.html.

Code
if (params$pull_data) {
    papers <- readRDS(gzcon(url("https://github.com/openjournals/joss-analytics/blob/gh-pages/joss_submission_analytics.rds?raw=true")))
    saveRDS(papers, file = "papers.rds")
}
papers <- readRDS("papers.rds")

Create summary tables

For easier plotting later, we create several summary tables.

Code
## Generate publication month and year, remove two retracted papers
papers <- papers |>
    dplyr::mutate(pubmonth = lubridate::floor_date(published.date, "month"),
                  pubyear = factor(lubridate::year(published.date))) |>
    dplyr::filter(api_state != "retracted")

## Monthly summary - number of papers
summarydf_month <- papers |>
    dplyr::group_by(pubmonth) |>
    dplyr::summarize(npub = n(), 
                     pubyear = unique(lubridate::year(published.date)),
                     .groups = "drop")

## Yearly summary - number of papers, editors and reviewers
summarydf_year <- papers |>
    dplyr::group_by(pubyear) |>
    dplyr::summarize(npub = n(), 
                     .groups = "drop") |>
    dplyr::full_join(
        ## editors
        papers |> 
            dplyr::select(pubyear, editor) |>
            tidyr::separate_longer_delim(editor, delim = ",") |>
            dplyr::group_by(pubyear) |>
            dplyr::summarize(nbr_editors = length(unique(editor)), 
                             .groups = "drop"),
        by = join_by(pubyear)
    ) |>
    dplyr::full_join(
        ## new editors
        papers |> 
            dplyr::select(pubyear, editor) |>
            tidyr::separate_longer_delim(editor, delim = ",") |>
            dplyr::arrange(pubyear) |>
            dplyr::filter(!duplicated(editor)) |>
            dplyr::group_by(pubyear) |>
            dplyr::summarize(nbr_new_editors = length(unique(editor)), 
                             .groups = "drop") ,
        by = join_by(pubyear)
    ) |>
    dplyr::full_join(
        ## reviewers
        papers |> 
            dplyr::select(pubyear, reviewers) |>
            tidyr::separate_longer_delim(reviewers, delim = ",") |>
            dplyr::group_by(pubyear) |>
            dplyr::summarize(nbr_reviewers = length(unique(reviewers)),
                             nbr_reviews = length(reviewers),
                             .groups = "drop"),
        by = join_by(pubyear)
    ) |>
    dplyr::full_join(
        ## new reviewers
        papers |> 
            dplyr::select(pubyear, reviewers) |>
            tidyr::separate_longer_delim(reviewers, delim = ",") |>
            dplyr::arrange(pubyear) |> 
            dplyr::filter(!duplicated(reviewers)) |>
            dplyr::group_by(pubyear) |>
            dplyr::summarize(nbr_new_reviewers = length(unique(reviewers)),
                             .groups = "drop"),
        by = join_by(pubyear)
    ) |>
    dplyr::mutate(frac_new_editors = scales::percent(nbr_new_editors/nbr_editors, 
                                                     accuracy = 0.01),
                  frac_new_reviewers = scales::percent(nbr_new_reviewers/nbr_reviewers,
                                                       accuracy = 0.01))

## Total reviewer/editor counts
tot_nbr_editors <- papers |>
    dplyr::select(editor) |>
    tidyr::separate_longer_delim(editor, delim = ",") |>
    dplyr::pull(editor) |>
    unique() |>
    length()

tot_nbr_reviewers <- papers |>
    dplyr::select(reviewers) |>
    tidyr::separate_longer_delim(reviewers, delim = ",") |>
    dplyr::pull(reviewers) |>
    unique() |>
    length()

Monthly summary

Code
DT::datatable(summarydf_month, 
              escape = FALSE, rownames = FALSE, 
              filter = list(position = 'top', clear = FALSE),
              options = list(scrollX = TRUE))

Yearly summary

Code
DT::datatable(summarydf_year,
              escape = FALSE, rownames = FALSE, 
              filter = list(position = 'top', clear = FALSE),
              options = list(scrollX = TRUE))

Define color palette

Code
## Years
yearcols <- c("#FF0066FF", "#328C97FF", "#D1AAC2FF", "#B3E0BFFF",
              "#DB7003FF", "#F8C1A6FF", "#A30000FF", "#97D1D9FF", "#916C37FF")
names(yearcols) <- as.character(2016:2024)

Number of papers published per month

The figure below shows the number of papers published each month. The overlaid curve represents a loess fit to the monthly data, generated using the ggplot2 package.

Code
(gg_nbrpapers <- ggplot(summarydf_month, 
                        aes(x = factor(pubmonth), y = npub)) + 
     geom_col(aes(fill = as.character(pubyear))) + 
     geom_smooth(aes(x = as.numeric(factor(pubmonth))), method = "loess", 
                 se = FALSE, color = "black") + 
     labs(x = "Publication month", y = "Number of published\npapers per month") + 
     scale_y_continuous(expand = c(0, 0)) + 
     scale_fill_manual(name = "Year", values = yearcols) + 
     guides(fill = guide_legend(nrow = 1, byrow = TRUE, 
                                title = "Publication year")) + 
     theme_cowplot() + 
     theme(axis.title = element_text(size = 15),
           axis.text.x = element_blank(),
           axis.ticks.x = element_blank(), 
           legend.position = "bottom",
           legend.justification = "right",
           legend.margin = margin(1, 10, 1, 1)))

Number of editors per year

The next figure shows the number of editors that accept at least one paper in a given year, as well as the total number of editors that have accepted at least one paper overall. Note that the data from 2024 only include papers published until 2024-07-17.

Code
(gg_nbreditors <- ggplot(summarydf_year,
                         aes(x = pubyear, y = nbr_editors, fill = pubyear)) + 
     scale_fill_manual(name = "Year", values = yearcols) + 
     geom_col() +
     annotate(geom = "text", x = 1, y = 0.9 * max(summarydf_year$nbr_editors), 
              label = paste0("Total number\nof editors: ", tot_nbr_editors), 
              hjust = 0, vjust = 1, size = 5) + 
     scale_y_continuous(expand = c(0, 0)) + 
     labs(x = "Publication year", y = "Number of editors") + 
     theme_cowplot() + 
     theme(legend.position = "none"))

Number of new editors per year

We next illustrate the number of editors that accept their first paper in a given year, as well as what fraction this represents of the total number of editors accepting a paper that year. Note that the data from 2024 only include papers published until 2024-07-17.

Code
ggplot(summarydf_year,
       aes(x = pubyear, y = nbr_new_editors, fill = pubyear)) + 
    scale_fill_manual(name = "Year", values = yearcols) + 
    geom_col() +
    geom_text(aes(label = frac_new_editors), vjust = -0.2) + 
    scale_y_continuous(expand = expansion(mult = c(0, .1))) + 
    labs(x = "", y = "Number of new editors and\npercentage of total number of editors") + 
    theme_cowplot() + 
    theme(legend.position = "none")

Code
## Combine - show both the total number of editors and the number of new ones
ggplot(summarydf_year,
       aes(x = pubyear, fill = pubyear)) + 
    scale_fill_manual(name = "Year", values = yearcols) + 
    geom_col(aes(y = nbr_editors), alpha = 0.25) +
    geom_col(aes(y = nbr_new_editors)) + 
    geom_text(aes(y = nbr_new_editors, label = frac_new_editors), vjust = -0.2) + 
    scale_y_continuous(expand = c(0, 0)) + 
    labs(x = "", y = "Number of new editors and\npercentage of total number of editors") + 
    theme_cowplot() + 
    theme(legend.position = "none")

Number of reviewers per year

Similarly to the number of editors above, the figure below shows the number of reviewers reviewing at least one paper in a given year, as well as the total number of reviews submitted in a year. Also here, the data from 2024 only include papers published until 2024-07-17.

Code
(gg_nbrreviewers <- ggplot(summarydf_year,
                           aes(x = pubyear, y = nbr_reviewers)) + 
        scale_fill_manual(name = "Year", values = yearcols) + 
        geom_col(aes(fill = pubyear)) +
        geom_line(aes(x = as.numeric(pubyear), y = nbr_reviews), color = "grey",
                  linewidth = 1.5) + 
        geom_point(aes(x = as.numeric(pubyear), y = nbr_reviews), color = "grey",
                   size = 2.5) + 
        geom_col(aes(fill = pubyear)) +
        annotate(geom = "text", x = 1, y = 0.9 * max(summarydf_year$nbr_reviews), 
                 label = paste0("Total number\nof reviewers: ", tot_nbr_reviewers), 
                 hjust = 0, vjust = 1, size = 5) + 
        scale_y_continuous(expand = expansion(mult = c(0, .05))) + 
        labs(x = "Publication year", y = "Number of reviewers\nand reviews", 
             caption = "Bars show number of unique reviewers,\ngrey line shows total number of reviews") + 
        theme_cowplot() + 
        theme(legend.position = "none"))

Number of ‘new’ reviewers per year

We also plot the number of reviewers reviewing their first paper in a given year, and calculate what fraction of the total number of reviewers that year that this represents. As above, the data from 2024 only include papers published until 2024-07-17.

Code
ggplot(summarydf_year,
       aes(x = pubyear, y = nbr_new_reviewers)) + 
    scale_fill_manual(name = "Year", values = yearcols) + 
    geom_col(aes(fill = pubyear)) +
    geom_text(aes(label = frac_new_reviewers), vjust = -0.2) + 
    scale_y_continuous(expand = expansion(mult = c(0, .1))) + 
    labs(x = "", y = "Number of first-time reviewers and\npercentage of total number of reviewers") + 
    theme_cowplot() + 
    theme(legend.position = "none")

Number of reviewers per submission

Here we illustrate the distribution of the number of reviewers assigned to each submission, over time. We exclude submissions that have already been reviewed at rOpenSci or pyOpenSci, since they are not re-reviewed at JOSS.

Code
nrev <- papers |>
    dplyr::filter(!grepl("rOpenSci|pyOpenSci", prerev_labels)) |>
    dplyr::select(pubyear, title, nbr_reviewers, doi)
(gg_nrevpersub <- ggplot(
    nrev, aes(x = nbr_reviewers, 
              fill = forcats::fct_relevel(pubyear, 
                                          rev(levels(pubyear))))) + 
        geom_bar() + 
        scale_fill_manual(values = yearcols, name = "Year") + 
        scale_y_continuous(expand = c(0, 0)) + 
        labs(x = "Number of reviewers per submissions", y = "Number of submissions",
             caption = "Submissions reviewed via rOpenSci/pyOpenSci are excluded") + 
        theme_cowplot() + 
        theme(legend.position = "none"))

Since 2020, all papers are reviewed by at least two reviewers. The handful of exceptions represent two addendum papers and three cases where the editor replaced one reviewer who dropped out during the process.

Time in review

In these plots we investigate how the time a submission spends in the pre-review or review stage (or their sum) has changed over time. The curve corresponds to a rolling median for submissions over 120 days.

Code
## Helper functions (modified from https://stackoverflow.com/questions/65147186/geom-smooth-with-median-instead-of-mean)
rolling_median <- function(formula, data, xwindow = 120, ...) {
    ## Get order of x-values and sort x/y
    ordr <- order(data$x)
    x <- data$x[ordr]
    y <- data$y[ordr]
    
    ## Initialize vector for smoothed y-values
    ys <- rep(NA, length(x))
    ## Calculate median y-value for each unique x-value
    for (xs in setdiff(unique(x), NA)) {
        ## Get x-values in the window, and calculate median of corresponding y
        j <- ((xs - xwindow/2) < x) & (x < (xs + xwindow/2))
        ys[x == xs] <- median(y[j], na.rm = TRUE)
    }
    y <- ys
    structure(list(x = x, y = y, f = approxfun(x, y)), class = "rollmed")
}

predict.rollmed <- function(mod, newdata, ...) {
    setNames(mod$f(newdata$x), newdata$x)
}
Code
data.frame(`Median number of days in pre-review` = 
               round(median(papers$days_in_pre, na.rm = TRUE), 1),
           `Mean number of days in pre-review` = 
               round(mean(papers$days_in_pre, na.rm = TRUE), 1),
           `Median number of days in review` = 
               round(median(papers$days_in_rev, na.rm = TRUE), 1),
           `Mean number of days in review` = 
               round(mean(papers$days_in_rev, na.rm = TRUE), 1),
           `Median number of days in pre-review + review` = 
               round(median(papers$days_in_pre + 
                                papers$days_in_rev, na.rm = TRUE), 1),
           `Mean number of days in pre-review + review` = 
               round(mean(papers$days_in_pre + 
                              papers$days_in_rev, na.rm = TRUE), 1),
           check.names = FALSE
           ) |>
    tidyr::pivot_longer(everything()) |>
    kableExtra::kbl(col.names = NULL) |>
    kableExtra::kable_styling()
Median number of days in pre-review 20.0 days
Mean number of days in pre-review 32.0 days
Median number of days in review 68.0 days
Mean number of days in review 93.2 days
Median number of days in pre-review + review 100.0 days
Mean number of days in pre-review + review 126.1 days
Code
textannot <- paste0("Median time in:\n", 
                    "Pre-review: ", 
                    round(median(papers$days_in_pre, na.rm = TRUE), 1), " days\n",
                    "Review: ",
                    round(median(papers$days_in_rev, na.rm = TRUE), 1), " days\n",
                    "Pre-review + review: ",
                    round(median(papers$days_in_pre + 
                                     papers$days_in_rev, na.rm = TRUE), 1), " days")
Code
(gg_timeinrev <- ggplot(papers, aes(x = prerev_opened, 
                                    y = as.numeric(days_in_pre) + as.numeric(days_in_rev),
                                    color = pubyear)) + 
        geom_point() +
        annotate(geom = "text", x = as.Date("2016-10-01"), y = 950, 
                 label = textannot, hjust = 0) + 
        geom_smooth(formula = y ~ x, method = "rolling_median", 
                    se = FALSE, method.args = list(xwindow = 120),
                    color = "black") +
        scale_color_manual(values = yearcols, name = "Year") + 
        labs(x = "Date of pre-review opening", y = "Number of days in\npre-review + review") + 
        theme_cowplot() + 
        theme(legend.position = "none"))

Number of comments per review issue

Here, we count the number of comments made in the review GitHub issues for each submission. We remove comments made by the editorial bot (user name @whedon or @editorialbot). Note that issues opened in the software repositories themselves, or comments therein, are not counted.

Code
ncomments <- readRDS("review_issue_nbr_comments.rds")
ncomments <- papers |>
    select(alternative.id, review_issue_id) |>
    left_join(ncomments, 
              by = join_by(alternative.id, review_issue_id))
ncomments_done <- ncomments |> 
    filter(!is.na(nbr_comments_nobot))
ncomments_todo <- ncomments |>
    filter(is.na(nbr_comments_nobot))

if (nrow(ncomments_todo) > 0) {
    ## Based on code from https://github.com/jennybc/analyze-github-stuff-with-r
    ncomments <- ncomments_todo %>%
        dplyr::slice(1:25) %>%
        mutate(res = review_issue_id %>% map(
            ~ gh(number = .x,
                 endpoint = "/repos/openjournals/joss-reviews/issues/:number/comments",
                 .limit = Inf))) %>%
        mutate(who = res %>% map(. %>% map_chr(c("user", "login")))) %>%
        select(-res) %>%
        mutate(nbr_comments = lengths(who),
               nbr_comments_nobot = vapply(
                   who, function(x) length(x[!x %in% c("whedon", "editorialbot")]), NA_integer_))
    saveRDS(bind_rows(ncomments_done, ncomments), 
            file = "review_issue_nbr_comments.rds")
    write.csv(bind_rows(ncomments_done, ncomments) |> select(-who), 
              file = "review_issue_nbr_comments.csv", 
              row.names = FALSE)
}
Code
ncomments <- readRDS("review_issue_nbr_comments.rds")

(gg_nbrcomments <- ggplot(
    papers |>
        left_join(ncomments,
                  by = join_by(alternative.id, review_issue_id)), 
    aes(x = nbr_comments_nobot, y = pubyear)) + 
        geom_density_ridges(aes(fill = pubyear)) + 
        scale_fill_manual(values = yearcols, name = "Year") + 
        labs(x = "Number of comments (not by bot) in review issue", y = "") + 
        theme_cowplot() + 
        theme(legend.position = "none"))
Picking joint bandwidth of 4.33

Funding statement statistics

We performed a manual check of all papers published in JOSS in 2023 to extract information about whether any acknowledgement of funding was made. Here we summarize these numbers.

Code
funding <- read.csv("joss_funding_statements_2023.csv")
data.frame(`Funding acknowledgement present` = 
               length(which(funding$has.funding.statement == "yes")),
           `No funding acknowledgement` = 
               length(which(funding$has.funding.statement == "no")),
           `Unclear` = 
               length(which(!funding$has.funding.statement %in% c("yes", "no"))),
           check.names = FALSE) |>
    tidyr::pivot_longer(everything()) |>
    kableExtra::kbl(col.names = NULL) |>
    kableExtra::kable_styling()
Funding acknowledgement present 280
No funding acknowledgement 120
Unclear 1

Author affiliation statistics

We also manually extracted information about the author affiliation countries for all papers published in JOSS in 2023. Here we summarize this information. In the tables below, n represents the number of papers with at least one author from the country or region, respectively.

Code
affcountries <- read.csv("joss_affiliation_statements_2023.csv")

affcountries <- affcountries |> select(alternative.id, affiliation.countries) |> 
    filter(!affiliation.countries %in% c("(none, no affiliation)", "unknown", "")) |>
    separate_rows(affiliation.countries, sep = ",") |> 
    mutate(affiliation.countries = trimws(affiliation.countries)) |>
    distinct() |> 
    mutate(region = countrycode::countrycode(affiliation.countries,
                                             origin = "country.name",
                                             destination = "region"))

Countries

Code
## Top countries (in terms of number of papers with at least one author from there)
affcountries |>
    group_by(affiliation.countries) |> 
    summarize(n = length(unique(alternative.id))) |> 
    arrange(desc(n)) |>
    kableExtra::kbl(col.names = NULL) |>
    kableExtra::kable_styling()
USA 153
Germany 80
UK 53
France 42
Canada 24
Australia 20
Netherlands 20
Norway 20
Switzerland 20
Italy 18
Belgium 12
Japan 12
Austria 11
Denmark 10
Sweden 10
China 9
Finland 8
Poland 8
Spain 7
Brazil 6
India 5
Czechia 4
Greece 3
Israel 3
South Korea 3
Argentina 2
Portugal 2
Serbia 2
Taiwan 2
Algeria 1
Colombia 1
Cuba 1
Cyprus 1
Estonia 1
Hungary 1
Ireland 1
Jordan 1
Latvia 1
Mexico 1
Morocco 1
Paraguay 1
Romania 1
Saudi Arabia 1
Singapore 1
South Africa 1
Turkey 1
Ukraine 1

Regions

Code
## Similarly, top regions
affcountries |>
    group_by(region) |> 
    summarize(n = length(unique(alternative.id))) |> 
    arrange(desc(n)) |> 
    kableExtra::kbl(col.names = NULL) |>
    kableExtra::kable_styling()
Europe & Central Asia 254
North America 171
East Asia & Pacific 41
Latin America & Caribbean 12
Middle East & North Africa 7
South Asia 5
Sub-Saharan Africa 1

Citation statistics

Finally, we calculate some statistics related to the citation of JOSS and SoftwareX papers. This information has been retrieved from OpenAlex, using the openalexR R package.

All papers

Code
## All papers
tmp <- papers$citation_count
cat("Number of papers: ", length(tmp), "\n")
cat("Number of citations: ", sum(tmp, na.rm = TRUE), "\n")
cat("Summary statistics: \n")
summary(tmp)
Number of papers:  2564 
Number of citations:  67542 
Summary statistics: 
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
    0.00     0.00     3.00    26.41     9.00 11020.00        7 

The most cited paper:

Code
maxcit <- which.max(papers$citation_count)
cat(paste0(papers$author[maxcit][[1]]$family[1], " et al (", 
           papers$pubyear[maxcit], "): ", papers$title[maxcit], 
           " (", papers$citation_count[maxcit], " citations)"))
Wickham et al (2019): Welcome to the Tidyverse (11020 citations)

Papers published in 2016-2023

Code
## Papers published in 2016-2023
tmp <- papers$citation_count[papers$pubyear %in% c(2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)]
cat("Number of papers: ", length(tmp), "\n")
cat("Number of citations: ", sum(tmp, na.rm = TRUE), "\n")
cat("Summary statistics: \n")
summary(tmp)
Number of papers:  2264 
Number of citations:  67458 
Summary statistics: 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    0.0     1.0     3.0    29.8    10.0 11020.0 

SoftwareX papers

Code
if (params$pull_data) {
    softwarex <- oa_fetch(entity = "works", 
                          primary_location.source.id = "s2506067282") |>
        dplyr::mutate(pubyear = year(as.Date(publication_date))) |>
        dplyr::filter(type == "article")
    saveRDS(softwarex |> 
                select(id, doi, title, publication_date, so, pubyear, 
                       cited_by_count) |>
                mutate(query_date = today()),
            file = "openalex_softwarex.rds")
}
softwarex <- readRDS("openalex_softwarex.rds")

All papers

Code
tmp <- softwarex$cited_by_count
cat("Number of papers: ", length(tmp), "\n")
cat("Number of citations: ", sum(tmp, na.rm = TRUE), "\n")
cat("Summary statistics: \n")
summary(tmp)
Number of papers:  1377 
Number of citations:  28232 
Summary statistics: 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    0.0     0.0     2.0    20.5     8.0 15709.0 

Papers published in 2016-2023

Code
tmp <- softwarex$cited_by_count[softwarex$pubyear %in% c(2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023)]
cat("Number of papers: ", length(tmp), "\n")
cat("Number of citations: ", sum(tmp, na.rm = TRUE), "\n")
cat("Summary statistics: \n")
summary(tmp)
Number of papers:  1154 
Number of citations:  12058 
Summary statistics: 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    1.00    3.00   10.45   10.00  442.00 

Put together final figures

Code
## Overwrite the get_legend function from cowplot temporarily, 
## since the cowplot one doesn't work with ggplot2 3.5.0
## see https://github.com/wilkelab/cowplot/issues/202

get_legend <- function(plot, legend = NULL) {
    gt <- ggplotGrob(plot)
    
    pattern <- "guide-box"
    if (!is.null(legend)) {
        pattern <- paste0(pattern, "-", legend)
    }
    
    indices <- grep(pattern, gt$layout$name)
    
    not_empty <- !vapply(
        gt$grobs[indices], 
        inherits, what = "zeroGrob", 
        FUN.VALUE = logical(1)
    )
    indices <- indices[not_empty]
    
    if (length(indices) > 0) {
        return(gt$grobs[[indices[1]]])
    }
    return(NULL)
}

Figure 1

Code
gg1 <- gg_nbrpapers + gg_nbreditors + 
    plot_annotation(tag_levels = "A", 
                    caption = paste0("Includes data until ", max(papers$published.date))) + 
    plot_layout(ncol = 2, guides = "collect") & 
    theme(legend.position = "none",
          plot.caption = element_text(size = 13))
gg2 <- get_legend(gg_nbrpapers)
plot_grid(gg1, gg2, ncol = 1, rel_heights = c(3.3, 0.7))

Figure 3

Code
gg1 <- gg_nrevpersub + gg_nbrreviewers + 
    plot_annotation(tag_levels = "A", 
                    caption = paste0("Includes data until ", max(papers$published.date))) + 
    plot_layout(ncol = 2, guides = "collect") & 
    theme(legend.position = "none",
          plot.caption = element_text(size = 13))
gg2 <- get_legend(gg_nbrpapers)
plot_grid(gg1, gg2, ncol = 1, rel_heights = c(3.3, 0.7))

Figure 4

Code
gg1 <- gg_nbrcomments + gg_timeinrev + 
    plot_annotation(tag_levels = "A", 
                    caption = paste0("Includes data until ", max(papers$published.date))) + 
    plot_layout(ncol = 2, guides = "collect") & 
    theme(legend.position = "none",
          plot.caption = element_text(size = 13))
gg2 <- get_legend(gg_nbrpapers)
plot_grid(gg1, gg2, ncol = 1, rel_heights = c(3.3, 0.7))

Session info

Session info
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] countrycode_1.6.0 openalexR_1.4.0   gh_1.4.1          purrr_1.0.2      
 [5] patchwork_1.3.0   kableExtra_1.4.0  lubridate_1.9.3   ggridges_0.5.6   
 [9] cowplot_1.1.3     tidyr_1.3.1       dplyr_1.1.4       ggplot2_3.5.1    

loaded via a namespace (and not attached):
 [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    xml2_1.3.6       
 [5] lattice_0.22-6    stringi_1.8.4     digest_0.6.37     magrittr_2.0.3   
 [9] evaluate_1.0.0    grid_4.4.1        timechange_0.3.0  fastmap_1.2.0    
[13] Matrix_1.7-0      jsonlite_1.8.9    mgcv_1.9-1        fansi_1.0.6      
[17] crosstalk_1.2.1   viridisLite_0.4.2 scales_1.3.0      jquerylib_0.1.4  
[21] cli_3.6.3         rlang_1.1.4       splines_4.4.1     munsell_0.5.1    
[25] cachem_1.1.0      withr_3.0.1       yaml_2.3.10       tools_4.4.1      
[29] colorspace_2.1-1  DT_0.33           forcats_1.0.0     vctrs_0.6.5      
[33] R6_2.5.1          lifecycle_1.0.4   stringr_1.5.1     htmlwidgets_1.6.4
[37] pkgconfig_2.0.3   bslib_0.8.0       pillar_1.9.0      gtable_0.3.5     
[41] glue_1.7.0        systemfonts_1.1.0 highr_0.11        xfun_0.47        
[45] tibble_3.2.1      tidyselect_1.2.1  rstudioapi_0.16.0 knitr_1.48       
[49] farver_2.1.2      nlme_3.1-164      htmltools_0.5.8.1 labeling_0.4.3   
[53] rmarkdown_2.28    svglite_2.1.3     compiler_4.4.1